package com.cyz.api.imagesearch.sub;

import com.cyz.exception.BusinessException;
import com.cyz.exception.ErrorCode;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 获取图片列表链接 firstUrl
 */
public class GetImageFirstUrlApi {


    public static String getImageFirstApi(String searchResultUrl){

        Document document;
        try {
            // 1. 调用 JSoup 获取页面信息
            document = Jsoup.connect(searchResultUrl)
                    .timeout(5000)
                    .get();


            // 2. 解析抓取到的网页数据
            Elements script = document.getElementsByTag("script");

            // 3. 获取 script 标签中的 firstUrl 字段
            String scriptDocument = script.html();

            // 判断 firstUrl 是否存在
            if (!scriptDocument.contains("\"firstUrl\"")){
                throw new BusinessException(ErrorCode.OPERATION_ERROR, "图片识别失败");
            }
            // 正则匹配 "firstUrl":"..." 部分
            Pattern pattern = Pattern.compile("\"firstUrl\"\\s*:\\s*\"(.*?)\"");
            Matcher matcher = pattern.matcher(scriptDocument);
            if (!matcher.find()) {
                throw new BusinessException(ErrorCode.OPERATION_ERROR, "图片识别失败");
            }
            String firstUrl = matcher.group(1);
            // 去除 firstUrl 中的转义字符
            firstUrl = firstUrl.replace("\\/", "/");

            // 返回结果
            return firstUrl;

        } catch (IOException e) {
            throw new BusinessException(ErrorCode.OPERATION_ERROR, "图片识别失败");
        }
    }

    public static void main(String[] args) {

        getImageFirstApi("https://graph.baidu.com/s?card_key=&entrance=GENERAL&extUiData[isLogoShow]=1&f=all&isLogoShow=1&session_id=3763688788953230572&sign=12149293acedd5d1ca97501753177831&tpl_from=pc");

    }

}
